47 files changed, 589 insertions, 486 deletions
diff --git a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc
index c112e044b1d8..c32c80b3e48b 100644
--- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc
+++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc
@@ -124,7 +124,9 @@
 #include <dev/isa/isvio.h>
 #include <dev/isa/wtreg.h>
 #include <dev/iscsi/iscsi_ioctl.h>
+#if 0
 #include <dev/nvmm/nvmm_ioctl.h>
+#endif
 #include <dev/ofw/openfirmio.h>
 #include <dev/pci/amrio.h>
 #include <dev/pci/mlyreg.h>
diff --git a/contrib/libunwind/src/UnwindRegistersRestore.S b/contrib/libunwind/src/UnwindRegistersRestore.S
index 9c9f481b10dd..518eef7a74b5 100644
--- a/contrib/libunwind/src/UnwindRegistersRestore.S
+++ b/contrib/libunwind/src/UnwindRegistersRestore.S
@@ -396,119 +396,119 @@ Lnovec:
 #elif defined(__ppc__)
 
 DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_ppc6jumptoEv)
-;
-; void libunwind::Registers_ppc::jumpto()
-;
-; On entry:
-;  thread_state pointer is in r3
-;
-
-  ; restore integral registerrs
-  ; skip r0 for now
-  ; skip r1 for now
-  lwz     r2, 16(r3)
-  ; skip r3 for now
-  ; skip r4 for now
-  ; skip r5 for now
-  lwz     r6, 32(r3)
-  lwz     r7, 36(r3)
-  lwz     r8, 40(r3)
-  lwz     r9, 44(r3)
-  lwz    r10, 48(r3)
-  lwz    r11, 52(r3)
-  lwz    r12, 56(r3)
-  lwz    r13, 60(r3)
-  lwz    r14, 64(r3)
-  lwz    r15, 68(r3)
-  lwz    r16, 72(r3)
-  lwz    r17, 76(r3)
-  lwz    r18, 80(r3)
-  lwz    r19, 84(r3)
-  lwz    r20, 88(r3)
-  lwz    r21, 92(r3)
-  lwz    r22, 96(r3)
-  lwz    r23,100(r3)
-  lwz    r24,104(r3)
-  lwz    r25,108(r3)
-  lwz    r26,112(r3)
-  lwz    r27,116(r3)
-  lwz    r28,120(r3)
-  lwz    r29,124(r3)
-  lwz    r30,128(r3)
-  lwz    r31,132(r3)
-
-  ; restore float registers
-  lfd    f0, 160(r3)
-  lfd    f1, 168(r3)
-  lfd    f2, 176(r3)
-  lfd    f3, 184(r3)
-  lfd    f4, 192(r3)
-  lfd    f5, 200(r3)
-  lfd    f6, 208(r3)
-  lfd    f7, 216(r3)
-  lfd    f8, 224(r3)
-  lfd    f9, 232(r3)
-  lfd    f10,240(r3)
-  lfd    f11,248(r3)
-  lfd    f12,256(r3)
-  lfd    f13,264(r3)
-  lfd    f14,272(r3)
-  lfd    f15,280(r3)
-  lfd    f16,288(r3)
-  lfd    f17,296(r3)
-  lfd    f18,304(r3)
-  lfd    f19,312(r3)
-  lfd    f20,320(r3)
-  lfd    f21,328(r3)
-  lfd    f22,336(r3)
-  lfd    f23,344(r3)
-  lfd    f24,352(r3)
-  lfd    f25,360(r3)
-  lfd    f26,368(r3)
-  lfd    f27,376(r3)
-  lfd    f28,384(r3)
-  lfd    f29,392(r3)
-  lfd    f30,400(r3)
-  lfd    f31,408(r3)
-
-  ; restore vector registers if any are in use
-  lwz    r5,156(r3)  ; test VRsave
-  cmpwi  r5,0
-  beq    Lnovec
-
-  subi  r4,r1,16
-  rlwinm  r4,r4,0,0,27  ; mask low 4-bits
-  ; r4 is now a 16-byte aligned pointer into the red zone
-  ; the _vectorRegisters may not be 16-byte aligned so copy via red zone temp buffer
+//
+// void libunwind::Registers_ppc::jumpto()
+//
+// On entry:
+//  thread_state pointer is in r3
+//
+
+  // restore integral registerrs
+  // skip r0 for now
+  // skip r1 for now
+  lwz     %r2,  16(%r3)
+  // skip r3 for now
+  // skip r4 for now
+  // skip r5 for now
+  lwz     %r6,  32(%r3)
+  lwz     %r7,  36(%r3)
+  lwz     %r8,  40(%r3)
+  lwz     %r9,  44(%r3)
+  lwz     %r10, 48(%r3)
+  lwz     %r11, 52(%r3)
+  lwz     %r12, 56(%r3)
+  lwz     %r13, 60(%r3)
+  lwz     %r14, 64(%r3)
+  lwz     %r15, 68(%r3)
+  lwz     %r16, 72(%r3)
+  lwz     %r17, 76(%r3)
+  lwz     %r18, 80(%r3)
+  lwz     %r19, 84(%r3)
+  lwz     %r20, 88(%r3)
+  lwz     %r21, 92(%r3)
+  lwz     %r22, 96(%r3)
+  lwz     %r23,100(%r3)
+  lwz     %r24,104(%r3)
+  lwz     %r25,108(%r3)
+  lwz     %r26,112(%r3)
+  lwz     %r27,116(%r3)
+  lwz     %r28,120(%r3)
+  lwz     %r29,124(%r3)
+  lwz     %r30,128(%r3)
+  lwz     %r31,132(%r3)
+
+  // restore float registers
+  lfd     %f0, 160(%r3)
+  lfd     %f1, 168(%r3)
+  lfd     %f2, 176(%r3)
+  lfd     %f3, 184(%r3)
+  lfd     %f4, 192(%r3)
+  lfd     %f5, 200(%r3)
+  lfd     %f6, 208(%r3)
+  lfd     %f7, 216(%r3)
+  lfd     %f8, 224(%r3)
+  lfd     %f9, 232(%r3)
+  lfd     %f10,240(%r3)
+  lfd     %f11,248(%r3)
+  lfd     %f12,256(%r3)
+  lfd     %f13,264(%r3)
+  lfd     %f14,272(%r3)
+  lfd     %f15,280(%r3)
+  lfd     %f16,288(%r3)
+  lfd     %f17,296(%r3)
+  lfd     %f18,304(%r3)
+  lfd     %f19,312(%r3)
+  lfd     %f20,320(%r3)
+  lfd     %f21,328(%r3)
+  lfd     %f22,336(%r3)
+  lfd     %f23,344(%r3)
+  lfd     %f24,352(%r3)
+  lfd     %f25,360(%r3)
+  lfd     %f26,368(%r3)
+  lfd     %f27,376(%r3)
+  lfd     %f28,384(%r3)
+  lfd     %f29,392(%r3)
+  lfd     %f30,400(%r3)
+  lfd     %f31,408(%r3)
+
+  // restore vector registers if any are in use
+  lwz     %r5, 156(%r3)       // test VRsave
+  cmpwi   %r5, 0
+  beq     Lnovec
 
+  subi    %r4, %r1, 16
+  rlwinm  %r4, %r4, 0, 0, 27  // mask low 4-bits
+  // r4 is now a 16-byte aligned pointer into the red zone
+  // the _vectorRegisters may not be 16-byte aligned so copy via red zone temp buffer
+ 
 
 #define LOAD_VECTOR_UNALIGNEDl(_index) \
-  andis.  r0,r5,(1<<(15-_index))  @\
-  beq    Ldone  ## _index     @\
-  lwz    r0, 424+_index*16(r3)  @\
-  stw    r0, 0(r4)        @\
-  lwz    r0, 424+_index*16+4(r3)  @\
-  stw    r0, 4(r4)        @\
-  lwz    r0, 424+_index*16+8(r3)  @\
-  stw    r0, 8(r4)        @\
-  lwz    r0, 424+_index*16+12(r3)@\
-  stw    r0, 12(r4)        @\
-  lvx    v ## _index,0,r4    @\
-Ldone  ## _index:
+  andis.  %r0, %r5, (1<<(15-_index))  SEPARATOR \
+  beq     Ldone ## _index             SEPARATOR \
+  lwz     %r0, 424+_index*16(%r3)     SEPARATOR \
+  stw     %r0, 0(%r4)                 SEPARATOR \
+  lwz     %r0, 424+_index*16+4(%r3)   SEPARATOR \
+  stw     %r0, 4(%r4)                 SEPARATOR \
+  lwz     %r0, 424+_index*16+8(%r3)   SEPARATOR \
+  stw     %r0, 8(%r4)                 SEPARATOR \
+  lwz     %r0, 424+_index*16+12(%r3)  SEPARATOR \
+  stw     %r0, 12(%r4)                SEPARATOR \
+  lvx     %v ## _index, 0, %r4        SEPARATOR \
+  Ldone ## _index:
 
 #define LOAD_VECTOR_UNALIGNEDh(_index) \
-  andi.  r0,r5,(1<<(31-_index))  @\
-  beq    Ldone  ## _index    @\
-  lwz    r0, 424+_index*16(r3)  @\
-  stw    r0, 0(r4)        @\
-  lwz    r0, 424+_index*16+4(r3)  @\
-  stw    r0, 4(r4)        @\
-  lwz    r0, 424+_index*16+8(r3)  @\
-  stw    r0, 8(r4)        @\
-  lwz    r0, 424+_index*16+12(r3)@\
-  stw    r0, 12(r4)        @\
-  lvx    v ## _index,0,r4    @\
-  Ldone  ## _index:
+  andi.   %r0, %r5, (1<<(31-_index))  SEPARATOR \
+  beq     Ldone ## _index             SEPARATOR \
+  lwz     %r0, 424+_index*16(%r3)     SEPARATOR \
+  stw     %r0, 0(%r4)                 SEPARATOR \
+  lwz     %r0, 424+_index*16+4(%r3)   SEPARATOR \
+  stw     %r0, 4(%r4)                 SEPARATOR \
+  lwz     %r0, 424+_index*16+8(%r3)   SEPARATOR \
+  stw     %r0, 8(%r4)                 SEPARATOR \
+  lwz     %r0, 424+_index*16+12(%r3)  SEPARATOR \
+  stw     %r0, 12(%r4)                SEPARATOR \
+  lvx     %v ## _index, 0, %r4        SEPARATOR \
+  Ldone ## _index:
 
 
   LOAD_VECTOR_UNALIGNEDl(0)
@@ -545,17 +545,17 @@ Ldone  ## _index:
   LOAD_VECTOR_UNALIGNEDh(31)
 
 Lnovec:
-  lwz    r0, 136(r3) ; __cr
-  mtocrf  255,r0
-  lwz    r0, 148(r3) ; __ctr
-  mtctr  r0
-  lwz    r0, 0(r3)  ; __ssr0
-  mtctr  r0
-  lwz    r0, 8(r3)  ; do r0 now
-  lwz    r5,28(r3)  ; do r5 now
-  lwz    r4,24(r3)  ; do r4 now
-  lwz    r1,12(r3)  ; do sp now
-  lwz    r3,20(r3)  ; do r3 last
+  lwz     %r0, 136(%r3)   // __cr
+  mtcr    %r0
+  lwz     %r0, 148(%r3)   // __ctr
+  mtctr   %r0
+  lwz     %r0,   0(%r3)   // __ssr0
+  mtctr   %r0
+  lwz     %r0,   8(%r3)   // do r0 now
+  lwz     %r5,  28(%r3)   // do r5 now
+  lwz     %r4,  24(%r3)   // do r4 now
+  lwz     %r1,  12(%r3)   // do sp now
+  lwz     %r3,  20(%r3)   // do r3 last
   bctr
 
 #elif defined(__arm64__) || defined(__aarch64__)
diff --git a/contrib/libunwind/src/UnwindRegistersSave.S b/contrib/libunwind/src/UnwindRegistersSave.S
index aa42df3f9497..b40a71fb34d4 100644
--- a/contrib/libunwind/src/UnwindRegistersSave.S
+++ b/contrib/libunwind/src/UnwindRegistersSave.S
@@ -557,144 +557,144 @@ DEFINE_LIBUNWIND_FUNCTION(unw_getcontext)
 
 #elif defined(__ppc__)
 
-;
-; extern int unw_getcontext(unw_context_t* thread_state)
-;
-; On entry:
-;  thread_state pointer is in r3
-;
+//
+// extern int unw_getcontext(unw_context_t* thread_state)
+//
+// On entry:
+//  thread_state pointer is in r3
+//
 DEFINE_LIBUNWIND_FUNCTION(unw_getcontext)
-  stw    r0,  8(r3)
-  mflr  r0
-  stw    r0,  0(r3)  ; store lr as ssr0
-  stw    r1, 12(r3)
-  stw    r2, 16(r3)
-  stw    r3, 20(r3)
-  stw    r4, 24(r3)
-  stw    r5, 28(r3)
-  stw    r6, 32(r3)
-  stw    r7, 36(r3)
-  stw    r8, 40(r3)
-  stw    r9, 44(r3)
-  stw     r10, 48(r3)
-  stw     r11, 52(r3)
-  stw     r12, 56(r3)
-  stw     r13, 60(r3)
-  stw     r14, 64(r3)
-  stw     r15, 68(r3)
-  stw     r16, 72(r3)
-  stw     r17, 76(r3)
-  stw     r18, 80(r3)
-  stw     r19, 84(r3)
-  stw     r20, 88(r3)
-  stw     r21, 92(r3)
-  stw     r22, 96(r3)
-  stw     r23,100(r3)
-  stw     r24,104(r3)
-  stw     r25,108(r3)
-  stw     r26,112(r3)
-  stw     r27,116(r3)
-  stw     r28,120(r3)
-  stw     r29,124(r3)
-  stw     r30,128(r3)
-  stw     r31,132(r3)
-
-  ; save VRSave register
-  mfspr  r0,256
-  stw    r0,156(r3)
-  ; save CR registers
-  mfcr  r0
-  stw    r0,136(r3)
-  ; save CTR register
-  mfctr  r0
-  stw    r0,148(r3)
-
-  ; save float registers
-  stfd    f0, 160(r3)
-  stfd    f1, 168(r3)
-  stfd    f2, 176(r3)
-  stfd    f3, 184(r3)
-  stfd    f4, 192(r3)
-  stfd    f5, 200(r3)
-  stfd    f6, 208(r3)
-  stfd    f7, 216(r3)
-  stfd    f8, 224(r3)
-  stfd    f9, 232(r3)
-  stfd    f10,240(r3)
-  stfd    f11,248(r3)
-  stfd    f12,256(r3)
-  stfd    f13,264(r3)
-  stfd    f14,272(r3)
-  stfd    f15,280(r3)
-  stfd    f16,288(r3)
-  stfd    f17,296(r3)
-  stfd    f18,304(r3)
-  stfd    f19,312(r3)
-  stfd    f20,320(r3)
-  stfd    f21,328(r3)
-  stfd    f22,336(r3)
-  stfd    f23,344(r3)
-  stfd    f24,352(r3)
-  stfd    f25,360(r3)
-  stfd    f26,368(r3)
-  stfd    f27,376(r3)
-  stfd    f28,384(r3)
-  stfd    f29,392(r3)
-  stfd    f30,400(r3)
-  stfd    f31,408(r3)
-
-
-  ; save vector registers
-
-  subi  r4,r1,16
-  rlwinm  r4,r4,0,0,27  ; mask low 4-bits
-  ; r4 is now a 16-byte aligned pointer into the red zone
+  stw     %r0,   8(%r3)
+  mflr    %r0
+  stw     %r0,   0(%r3) // store lr as ssr0
+  stw     %r1,  12(%r3)
+  stw     %r2,  16(%r3)
+  stw     %r3,  20(%r3)
+  stw     %r4,  24(%r3)
+  stw     %r5,  28(%r3)
+  stw     %r6,  32(%r3)
+  stw     %r7,  36(%r3)
+  stw     %r8,  40(%r3)
+  stw     %r9,  44(%r3)
+  stw     %r10, 48(%r3)
+  stw     %r11, 52(%r3)
+  stw     %r12, 56(%r3)
+  stw     %r13, 60(%r3)
+  stw     %r14, 64(%r3)
+  stw     %r15, 68(%r3)
+  stw     %r16, 72(%r3)
+  stw     %r17, 76(%r3)
+  stw     %r18, 80(%r3)
+  stw     %r19, 84(%r3)
+  stw     %r20, 88(%r3)
+  stw     %r21, 92(%r3)
+  stw     %r22, 96(%r3)
+  stw     %r23,100(%r3)
+  stw     %r24,104(%r3)
+  stw     %r25,108(%r3)
+  stw     %r26,112(%r3)
+  stw     %r27,116(%r3)
+  stw     %r28,120(%r3)
+  stw     %r29,124(%r3)
+  stw     %r30,128(%r3)
+  stw     %r31,132(%r3)
+
+  // save VRSave register
+  mfspr   %r0, 256
+  stw     %r0, 156(%r3)
+  // save CR registers
+  mfcr    %r0
+  stw     %r0, 136(%r3)
+  // save CTR register
+  mfctr   %r0
+  stw     %r0, 148(%r3)
+
+  // save float registers
+  stfd    %f0, 160(%r3)
+  stfd    %f1, 168(%r3)
+  stfd    %f2, 176(%r3)
+  stfd    %f3, 184(%r3)
+  stfd    %f4, 192(%r3)
+  stfd    %f5, 200(%r3)
+  stfd    %f6, 208(%r3)
+  stfd    %f7, 216(%r3)
+  stfd    %f8, 224(%r3)
+  stfd    %f9, 232(%r3)
+  stfd    %f10,240(%r3)
+  stfd    %f11,248(%r3)
+  stfd    %f12,256(%r3)
+  stfd    %f13,264(%r3)
+  stfd    %f14,272(%r3)
+  stfd    %f15,280(%r3)
+  stfd    %f16,288(%r3)
+  stfd    %f17,296(%r3)
+  stfd    %f18,304(%r3)
+  stfd    %f19,312(%r3)
+  stfd    %f20,320(%r3)
+  stfd    %f21,328(%r3)
+  stfd    %f22,336(%r3)
+  stfd    %f23,344(%r3)
+  stfd    %f24,352(%r3)
+  stfd    %f25,360(%r3)
+  stfd    %f26,368(%r3)
+  stfd    %f27,376(%r3)
+  stfd    %f28,384(%r3)
+  stfd    %f29,392(%r3)
+  stfd    %f30,400(%r3)
+  stfd    %f31,408(%r3)
+
+
+  // save vector registers
+
+  subi    %r4, %r1, 16
+  rlwinm  %r4, %r4, 0, 0, 27  // mask low 4-bits
+  // r4 is now a 16-byte aligned pointer into the red zone
 
 #define SAVE_VECTOR_UNALIGNED(_vec, _offset) \
-  stvx  _vec,0,r4           @\
-  lwz    r5, 0(r4)          @\
-  stw    r5, _offset(r3)    @\
-  lwz    r5, 4(r4)          @\
-  stw    r5, _offset+4(r3)  @\
-  lwz    r5, 8(r4)          @\
-  stw    r5, _offset+8(r3)  @\
-  lwz    r5, 12(r4)         @\
-  stw    r5, _offset+12(r3)
-
-  SAVE_VECTOR_UNALIGNED( v0, 424+0x000)
-  SAVE_VECTOR_UNALIGNED( v1, 424+0x010)
-  SAVE_VECTOR_UNALIGNED( v2, 424+0x020)
-  SAVE_VECTOR_UNALIGNED( v3, 424+0x030)
-  SAVE_VECTOR_UNALIGNED( v4, 424+0x040)
-  SAVE_VECTOR_UNALIGNED( v5, 424+0x050)
-  SAVE_VECTOR_UNALIGNED( v6, 424+0x060)
-  SAVE_VECTOR_UNALIGNED( v7, 424+0x070)
-  SAVE_VECTOR_UNALIGNED( v8, 424+0x080)
-  SAVE_VECTOR_UNALIGNED( v9, 424+0x090)
-  SAVE_VECTOR_UNALIGNED(v10, 424+0x0A0)
-  SAVE_VECTOR_UNALIGNED(v11, 424+0x0B0)
-  SAVE_VECTOR_UNALIGNED(v12, 424+0x0C0)
-  SAVE_VECTOR_UNALIGNED(v13, 424+0x0D0)
-  SAVE_VECTOR_UNALIGNED(v14, 424+0x0E0)
-  SAVE_VECTOR_UNALIGNED(v15, 424+0x0F0)
-  SAVE_VECTOR_UNALIGNED(v16, 424+0x100)
-  SAVE_VECTOR_UNALIGNED(v17, 424+0x110)
-  SAVE_VECTOR_UNALIGNED(v18, 424+0x120)
-  SAVE_VECTOR_UNALIGNED(v19, 424+0x130)
-  SAVE_VECTOR_UNALIGNED(v20, 424+0x140)
-  SAVE_VECTOR_UNALIGNED(v21, 424+0x150)
-  SAVE_VECTOR_UNALIGNED(v22, 424+0x160)
-  SAVE_VECTOR_UNALIGNED(v23, 424+0x170)
-  SAVE_VECTOR_UNALIGNED(v24, 424+0x180)
-  SAVE_VECTOR_UNALIGNED(v25, 424+0x190)
-  SAVE_VECTOR_UNALIGNED(v26, 424+0x1A0)
-  SAVE_VECTOR_UNALIGNED(v27, 424+0x1B0)
-  SAVE_VECTOR_UNALIGNED(v28, 424+0x1C0)
-  SAVE_VECTOR_UNALIGNED(v29, 424+0x1D0)
-  SAVE_VECTOR_UNALIGNED(v30, 424+0x1E0)
-  SAVE_VECTOR_UNALIGNED(v31, 424+0x1F0)
-
-  li  r3, 0    ; return UNW_ESUCCESS
+  stvx    _vec, 0, %r4          SEPARATOR \
+  lwz     %r5, 0(%r4)           SEPARATOR \
+  stw     %r5, _offset(%r3)     SEPARATOR \
+  lwz     %r5, 4(%r4)           SEPARATOR \
+  stw     %r5, _offset+4(%r3)   SEPARATOR \
+  lwz     %r5, 8(%r4)           SEPARATOR \
+  stw     %r5, _offset+8(%r3)   SEPARATOR \
+  lwz     %r5, 12(%r4)          SEPARATOR \
+  stw     %r5, _offset+12(%r3)
+
+  SAVE_VECTOR_UNALIGNED( %v0, 424+0x000)
+  SAVE_VECTOR_UNALIGNED( %v1, 424+0x010)
+  SAVE_VECTOR_UNALIGNED( %v2, 424+0x020)
+  SAVE_VECTOR_UNALIGNED( %v3, 424+0x030)
+  SAVE_VECTOR_UNALIGNED( %v4, 424+0x040)
+  SAVE_VECTOR_UNALIGNED( %v5, 424+0x050)
+  SAVE_VECTOR_UNALIGNED( %v6, 424+0x060)
+  SAVE_VECTOR_UNALIGNED( %v7, 424+0x070)
+  SAVE_VECTOR_UNALIGNED( %v8, 424+0x080)
+  SAVE_VECTOR_UNALIGNED( %v9, 424+0x090)
+  SAVE_VECTOR_UNALIGNED(%v10, 424+0x0A0)
+  SAVE_VECTOR_UNALIGNED(%v11, 424+0x0B0)
+  SAVE_VECTOR_UNALIGNED(%v12, 424+0x0C0)
+  SAVE_VECTOR_UNALIGNED(%v13, 424+0x0D0)
+  SAVE_VECTOR_UNALIGNED(%v14, 424+0x0E0)
+  SAVE_VECTOR_UNALIGNED(%v15, 424+0x0F0)
+  SAVE_VECTOR_UNALIGNED(%v16, 424+0x100)
+  SAVE_VECTOR_UNALIGNED(%v17, 424+0x110)
+  SAVE_VECTOR_UNALIGNED(%v18, 424+0x120)
+  SAVE_VECTOR_UNALIGNED(%v19, 424+0x130)
+  SAVE_VECTOR_UNALIGNED(%v20, 424+0x140)
+  SAVE_VECTOR_UNALIGNED(%v21, 424+0x150)
+  SAVE_VECTOR_UNALIGNED(%v22, 424+0x160)
+  SAVE_VECTOR_UNALIGNED(%v23, 424+0x170)
+  SAVE_VECTOR_UNALIGNED(%v24, 424+0x180)
+  SAVE_VECTOR_UNALIGNED(%v25, 424+0x190)
+  SAVE_VECTOR_UNALIGNED(%v26, 424+0x1A0)
+  SAVE_VECTOR_UNALIGNED(%v27, 424+0x1B0)
+  SAVE_VECTOR_UNALIGNED(%v28, 424+0x1C0)
+  SAVE_VECTOR_UNALIGNED(%v29, 424+0x1D0)
+  SAVE_VECTOR_UNALIGNED(%v30, 424+0x1E0)
+  SAVE_VECTOR_UNALIGNED(%v31, 424+0x1F0)
+
+  li      %r3, 0  // return UNW_ESUCCESS
   blr
 
 
diff --git a/contrib/libunwind/src/assembly.h b/contrib/libunwind/src/assembly.h
index 2df930214fae..7806892e9dcf 100644
--- a/contrib/libunwind/src/assembly.h
+++ b/contrib/libunwind/src/assembly.h
@@ -29,8 +29,6 @@
 #ifdef _ARCH_PWR8
 #define PPC64_HAS_VMX
 #endif
-#elif defined(__POWERPC__) || defined(__powerpc__) || defined(__ppc__)
-#define SEPARATOR @
 #elif defined(__arm64__)
 #define SEPARATOR %%
 #else
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
index ba55ffc28174..8a88a2fa3a09 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
@@ -301,7 +301,7 @@ void CIE::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH) const {
   OS << format("  Data alignment factor: %d\n", (int32_t)DataAlignmentFactor);
   OS << format("  Return address column: %d\n", (int32_t)ReturnAddressRegister);
   if (Personality)
-    OS << format("  Personality Address: %08x\n", *Personality);
+    OS << format("  Personality Address: %016" PRIx64 "\n", *Personality);
   if (!AugmentationData.empty()) {
     OS << "  Augmentation data:    ";
     for (uint8_t Byte : AugmentationData)
@@ -320,7 +320,7 @@ void FDE::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH) const {
                (uint32_t)InitialLocation,
                (uint32_t)InitialLocation + (uint32_t)AddressRange);
   if (LSDAAddress)
-    OS << format("  LSDA Address: %08x\n", *LSDAAddress);
+    OS << format("  LSDA Address: %016" PRIx64 "\n", *LSDAAddress);
   CFIs.dump(OS, MRI, IsEH);
   OS << "\n";
 }
diff --git a/contrib/llvm/lib/MC/ELFObjectWriter.cpp b/contrib/llvm/lib/MC/ELFObjectWriter.cpp
index ade858113a30..1b505776ca19 100644
--- a/contrib/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/ELFObjectWriter.cpp
@@ -1271,6 +1271,7 @@ void ELFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
     // This is the first place we are able to copy this information.
     Alias->setExternal(Symbol.isExternal());
     Alias->setBinding(Symbol.getBinding());
+    Alias->setOther(Symbol.getOther());
 
     if (!Symbol.isUndefined() && !Rest.startswith("@@@"))
       continue;
diff --git a/contrib/llvm/lib/MC/MCWin64EH.cpp b/contrib/llvm/lib/MC/MCWin64EH.cpp
index 8bc1f08c8875..3ef1514455af 100644
--- a/contrib/llvm/lib/MC/MCWin64EH.cpp
+++ b/contrib/llvm/lib/MC/MCWin64EH.cpp
@@ -522,7 +522,7 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
     if (MatchingEpilog) {
       assert(EpilogInfo.find(MatchingEpilog) != EpilogInfo.end() &&
              "Duplicate epilog not found");
-      EpilogInfo[EpilogStart] = EpilogInfo[MatchingEpilog];
+      EpilogInfo[EpilogStart] = EpilogInfo.lookup(MatchingEpilog);
       // Clear the unwind codes in the EpilogMap, so that they don't get output
       // in the logic below.
       EpilogInstrs.clear();
diff --git a/contrib/llvm/lib/MC/WasmObjectWriter.cpp b/contrib/llvm/lib/MC/WasmObjectWriter.cpp
index 333748db9190..b07fe05cad5b 100644
--- a/contrib/llvm/lib/MC/WasmObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/WasmObjectWriter.cpp
@@ -368,7 +368,13 @@ void WasmObjectWriter::startCustomSection(SectionBookkeeping &Section,
 // Now that the section is complete and we know how big it is, patch up the
 // section size field at the start of the section.
 void WasmObjectWriter::endSection(SectionBookkeeping &Section) {
-  uint64_t Size = W.OS.tell() - Section.PayloadOffset;
+  uint64_t Size = W.OS.tell();
+  // /dev/null doesn't support seek/tell and can report offset of 0.
+  // Simply skip this patching in that case.
+  if (!Size)
+    return;
+
+  Size -= Section.PayloadOffset;
   if (uint32_t(Size) != Size)
     report_fatal_error("section size does not fit in a uint32_t");
 
diff --git a/contrib/llvm/lib/Object/COFFImportFile.cpp b/contrib/llvm/lib/Object/COFFImportFile.cpp
index dc11cc4bcffe..e7c7efe43676 100644
--- a/contrib/llvm/lib/Object/COFFImportFile.cpp
+++ b/contrib/llvm/lib/Object/COFFImportFile.cpp
@@ -496,7 +496,7 @@ NewArchiveMember ObjectFactory::createWeakExternal(StringRef Sym,
 
   // COFF Header
   coff_file_header Header{
-      u16(0),
+      u16(Machine),
       u16(NumberOfSections),
       u32(0),
       u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section))),
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
index 4d892465b3f2..61652b1d8e3d 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
@@ -239,7 +239,6 @@ def M4WriteNEONK   : SchedWriteRes<[M4UnitNSHF,
                                     M4UnitS0]>    { let Latency = 5;
                                                     let NumMicroOps = 2; }
 def M4WriteNEONL   : SchedWriteRes<[M4UnitNMUL]>  { let Latency = 3; }
-def M4WriteNEONM   : SchedWriteRes<[M4UnitNMUL]>  { let Latency = 3; }
 def M4WriteNEONN   : SchedWriteRes<[M4UnitNMSC,
                                     M4UnitNMSC]>  { let Latency = 5;
                                                     let NumMicroOps = 2; }
@@ -480,8 +479,6 @@ def M4WriteCOPY    : SchedWriteVariant<[SchedVar<ExynosFPPred, [M4WriteNALU1]>,
                                         SchedVar<NoSchedPred,  [M4WriteZ0]>]>;
 def M4WriteMOVI    : SchedWriteVariant<[SchedVar<IsZeroFPIdiomPred, [M4WriteZ0]>,
                                         SchedVar<NoSchedPred,       [M4WriteNALU1]>]>;
-def M4WriteMULL    : SchedWriteVariant<[SchedVar<ExynosLongVectorUpperPred, [M4WriteNEONM]>,
-                                        SchedVar<NoSchedPred,               [M4WriteNMUL3]>]>;
 
 // Fast forwarding.
 def M4ReadAESM1    : SchedReadAdvance<+1, [M4WriteNCRY1]>;
@@ -489,7 +486,8 @@ def M4ReadFMACM1   : SchedReadAdvance<+1, [M4WriteFMAC4,
                                            M4WriteFMAC4H,
                                            M4WriteFMAC5]>;
 def M4ReadNMULM1   : SchedReadAdvance<+1, [M4WriteNMUL3]>;
-def M4ReadMULLP2   : SchedReadAdvance<-2, [M4WriteNEONM]>;
+def M4ReadNMULP2   : SchedReadAdvance<-2, [M4WriteNMUL3]>;
+
 
 //===----------------------------------------------------------------------===//
 // Coarse scheduling model.
@@ -662,10 +660,8 @@ def : InstRW<[M4WriteNEONK],  (instregex "^FMOVDXHighr")>;
 def : InstRW<[M4WriteFCVT3H], (instregex "^F(RECP|RSQRT)Ev1f16")>;
 def : InstRW<[M4WriteFCVT3],  (instregex "^F(RECP|RSQRT)Ev1i(32|64)")>;
 def : InstRW<[M4WriteNMSC1],  (instregex "^FRECPXv1")>;
-def : InstRW<[M4WriteFMAC4H,
-              M4ReadFMACM1],  (instregex "^F(RECP|RSQRT)S16")>;
-def : InstRW<[M4WriteFMAC4,
-              M4ReadFMACM1],  (instregex "^F(RECP|RSQRT)S(32|64)")>;
+def : InstRW<[M4WriteFMAC4H], (instregex "^F(RECP|RSQRT)S16")>;
+def : InstRW<[M4WriteFMAC4],  (instregex "^F(RECP|RSQRT)S(32|64)")>;
 
 // FP load instructions.
 def : InstRW<[WriteVLD],    (instregex "^LDR[SDQ]l")>;
@@ -736,14 +732,20 @@ def : InstRW<[M4WriteNALU1],  (instregex "^(AND|BIC|EOR|NOT|ORN|ORR)v")>;
 def : InstRW<[M4WriteNMSC1],  (instregex "^[SU](MIN|MAX)v")>;
 def : InstRW<[M4WriteNMSC2],  (instregex "^[SU](MIN|MAX)Pv")>;
 def : InstRW<[M4WriteNHAD3],  (instregex "^[SU](MIN|MAX)Vv")>;
-def : InstRW<[M4WriteNMUL3],  (instregex "^(SQR?D)?MULH?v")>;
 def : InstRW<[M4WriteNMUL3,
               M4ReadNMULM1],  (instregex "^ML[AS]v")>;
-def : InstRW<[M4WriteNMUL3],  (instregex "^SQRDML[AS]H")>;
-def : InstRW<[M4WriteMULL,
-              M4ReadMULLP2],  (instregex "^(S|U|SQD)ML[AS]Lv")>;
-def : InstRW<[M4WriteMULL,
-              M4ReadMULLP2],  (instregex "^(S|U|SQD)MULLv")>;
+def : InstRW<[M4WriteNMUL3,
+              M4ReadNMULM1],  (instregex "^(SQR?D)?MULH?v")>;
+def : InstRW<[M4WriteNMUL3,
+              M4ReadNMULM1],  (instregex "^SQRDML[AS]H")>;
+def : InstRW<[M4WriteNMUL3,
+              M4ReadNMULM1],  (instregex "^(S|U|SQD)ML[AS]L(v1(i32|i64)|v2i32|v4i16|v8i8)")>;
+def : InstRW<[M4WriteNMUL3,
+              M4ReadNMULP2],  (instregex "^(S|U|SQD)ML[AS]L(v4i32|v8i16|v16i8)")>;
+def : InstRW<[M4WriteNMUL3,
+              M4ReadNMULM1],  (instregex "^(S|U|SQD)MULL(v1(i32|i64)|v2i32|v4i16|v8i8)")>;
+def : InstRW<[M4WriteNMUL3,
+              M4ReadNMULP2],  (instregex "^(S|U|SQD)MULL(v4i32|v8i16|v16i8)")>;
 def : InstRW<[M4WriteNMUL3],  (instregex "^[SU]DOT(lane)?v")>;
 def : InstRW<[M4WriteNHAD3],  (instregex "^[SU]ADALPv")>;
 def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]R?SRA[dv]")>;
@@ -808,10 +810,8 @@ def : InstRW<[M4WriteNALU1],  (instregex "^FMOVv.f(32|64)")>;
 def : InstRW<[M4WriteFCVT3H], (instregex "^F(RECP|RSQRT)Ev[248]f16")>;
 def : InstRW<[M4WriteFCVT3],  (instregex "^F(RECP|RSQRT)Ev[248]f(32|64)")>;
 def : InstRW<[M4WriteFCVT3],  (instregex "^U(RECP|RSQRT)Ev[24]i32")>;
-def : InstRW<[M4WriteFMAC4H,
-              M4ReadFMACM1],  (instregex "^F(RECP|RSQRT)Sv.f16")>;
-def : InstRW<[M4WriteFMAC4,
-              M4ReadFMACM1],  (instregex "^F(RECP|RSQRT)Sv.f(32|64)")>;
+def : InstRW<[M4WriteFMAC4H], (instregex "^F(RECP|RSQRT)Sv.f16")>;
+def : InstRW<[M4WriteFMAC4],  (instregex "^F(RECP|RSQRT)Sv.f(32|64)")>;
 def : InstRW<[M4WriteNSHF1],  (instregex "^REV(16|32|64)v")>;
 def : InstRW<[M4WriteNSHFA],  (instregex "^TB[LX]v(8|16)i8One")>;
 def : InstRW<[M4WriteNSHFB],  (instregex "^TB[LX]v(8|16)i8Two")>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td
index 48c54230e9d8..316036d89406 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td
@@ -103,17 +103,6 @@ def ExynosScaledIdxPred : MCSchedPredicate<ExynosScaledIdxFn>;
 // Identify FP instructions.
 def ExynosFPPred : MCSchedPredicate<CheckAny<[CheckDForm, CheckQForm]>>;
 
-// Identify whether an instruction whose result is a long vector
-// operates on the upper half of the input registers.
-def ExynosLongVectorUpperFn   : TIIPredicate<
-                                  "isExynosLongVectorUpper",
-                                  MCOpcodeSwitchStatement<
-                                  [MCOpcodeSwitchCase<
-                                    IsLongVectorUpperOp.ValidOpcodes,
-                                    MCReturnStatement<TruePred>>],
-                                  MCReturnStatement<FalsePred>>>;
-def ExynosLongVectorUpperPred : MCSchedPredicate<ExynosLongVectorUpperFn>;
-
 // Identify 128-bit NEON instructions.
 def ExynosQFormPred : MCSchedPredicate<CheckQForm>;
 
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedPredicates.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedPredicates.td
index dbaf11fc95dd..b23572b41b9c 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SchedPredicates.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedPredicates.td
@@ -268,59 +268,6 @@ def IsStoreRegOffsetOp     : CheckOpcode<[STRBBroW, STRBBroX,
 def IsLoadStoreRegOffsetOp : CheckOpcode<!listconcat(IsLoadRegOffsetOp.ValidOpcodes,
                                                      IsStoreRegOffsetOp.ValidOpcodes)>;
 
-// Identify whether an instruction whose result is a long vector
-// operates on the upper half of the input registers.
-def IsLongVectorUpperOp    : CheckOpcode<[FCVTLv8i16, FCVTLv4i32,
-                                          FCVTNv8i16, FCVTNv4i32,
-                                          FCVTXNv4f32,
-                                          PMULLv16i8, PMULLv2i64,
-                                          RADDHNv8i16_v16i8, RADDHNv4i32_v8i16, RADDHNv2i64_v4i32,
-                                          RSHRNv16i8_shift, RSHRNv8i16_shift, RSHRNv4i32_shift,
-                                          RSUBHNv8i16_v16i8, RSUBHNv4i32_v8i16, RSUBHNv2i64_v4i32,
-                                          SABALv16i8_v8i16, SABALv8i16_v4i32, SABALv4i32_v2i64,
-                                          SABDLv16i8_v8i16, SABDLv8i16_v4i32, SABDLv4i32_v2i64,
-                                          SADDLv16i8_v8i16, SADDLv8i16_v4i32, SADDLv4i32_v2i64,
-                                          SADDWv16i8_v8i16, SADDWv8i16_v4i32, SADDWv4i32_v2i64,
-                                          SHLLv16i8, SHLLv8i16, SHLLv4i32,
-                                          SHRNv16i8_shift, SHRNv8i16_shift, SHRNv4i32_shift,
-                                          SMLALv16i8_v8i16, SMLALv8i16_v4i32, SMLALv4i32_v2i64,
-                                          SMLALv8i16_indexed, SMLALv4i32_indexed,
-                                          SMLSLv16i8_v8i16, SMLSLv8i16_v4i32, SMLSLv4i32_v2i64,
-                                          SMLSLv8i16_indexed, SMLSLv4i32_indexed,
-                                          SMULLv16i8_v8i16, SMULLv8i16_v4i32, SMULLv4i32_v2i64,
-                                          SMULLv8i16_indexed, SMULLv4i32_indexed,
-                                          SQDMLALv8i16_v4i32, SQDMLALv4i32_v2i64,
-                                          SQDMLALv8i16_indexed, SQDMLALv4i32_indexed,
-                                          SQDMLSLv8i16_v4i32, SQDMLSLv4i32_v2i64,
-                                          SQDMLSLv8i16_indexed, SQDMLSLv4i32_indexed,
-                                          SQDMULLv8i16_v4i32, SQDMULLv4i32_v2i64,
-                                          SQDMULLv8i16_indexed, SQDMULLv4i32_indexed,
-                                          SQRSHRNv16i8_shift, SQRSHRNv8i16_shift, SQRSHRNv4i32_shift,
-                                          SQRSHRUNv16i8_shift, SQRSHRUNv8i16_shift, SQRSHRUNv4i32_shift,
-                                          SQSHRNv16i8_shift, SQSHRNv8i16_shift, SQSHRNv4i32_shift,
-                                          SQSHRUNv16i8_shift, SQSHRUNv8i16_shift, SQSHRUNv4i32_shift,
-                                          SQXTNv16i8, SQXTNv8i16, SQXTNv4i32,
-                                          SQXTUNv16i8, SQXTUNv8i16, SQXTUNv4i32,
-                                          SSHLLv16i8_shift, SSHLLv8i16_shift, SSHLLv4i32_shift,
-                                          SSUBLv16i8_v8i16, SSUBLv8i16_v4i32, SSUBLv4i32_v2i64,
-                                          SSUBWv16i8_v8i16, SSUBWv8i16_v4i32, SSUBWv4i32_v2i64,
-                                          UABALv16i8_v8i16, UABALv8i16_v4i32, UABALv4i32_v2i64,
-                                          UABDLv16i8_v8i16, UABDLv8i16_v4i32, UABDLv4i32_v2i64,
-                                          UADDLv16i8_v8i16, UADDLv8i16_v4i32, UADDLv4i32_v2i64,
-                                          UADDWv16i8_v8i16, UADDWv8i16_v4i32, UADDWv4i32_v2i64,
-                                          UMLALv16i8_v8i16, UMLALv8i16_v4i32, UMLALv4i32_v2i64,
-                                          UMLALv8i16_indexed, UMLALv4i32_indexed,
-                                          UMLSLv16i8_v8i16, UMLSLv8i16_v4i32, UMLSLv4i32_v2i64,
-                                          UMLSLv8i16_indexed, UMLSLv4i32_indexed,
-                                          UMULLv16i8_v8i16, UMULLv8i16_v4i32, UMULLv4i32_v2i64,
-                                          UMULLv8i16_indexed, UMULLv4i32_indexed,
-                                          UQSHRNv16i8_shift, UQSHRNv8i16_shift, UQSHRNv4i32_shift,
-                                          UQXTNv16i8, UQXTNv8i16, UQXTNv4i32,
-                                          USHLLv16i8_shift, USHLLv8i16_shift, USHLLv4i32_shift,
-                                          USUBLv16i8_v8i16, USUBLv8i16_v4i32, USUBLv4i32_v2i64,
-                                          USUBWv16i8_v8i16, USUBWv8i16_v4i32, USUBWv4i32_v2i64,
-                                          XTNv16i8, XTNv8i16, XTNv4i32]>;
-
 // Target predicates.
 
 // Identify an instruction that effectively transfers a register to another.
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index f4e866958369..d679abd107d2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -201,49 +201,55 @@ static bool updateOperand(FoldCandidate &Fold,
         Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
       }
     }
+  }
 
-    if (Fold.needsShrink()) {
-      MachineBasicBlock *MBB = MI->getParent();
-      auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
-      if (Liveness != MachineBasicBlock::LQR_Dead)
-        return false;
-
-      MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
-      int Op32 = Fold.getShrinkOpcode();
-      MachineOperand &Dst0 = MI->getOperand(0);
-      MachineOperand &Dst1 = MI->getOperand(1);
-      assert(Dst0.isDef() && Dst1.isDef());
-
-      bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
+  if ((Fold.isImm() || Fold.isFI()) && Fold.needsShrink()) {
+    MachineBasicBlock *MBB = MI->getParent();
+    auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
+    if (Liveness != MachineBasicBlock::LQR_Dead)
+      return false;
 
-      const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
-      unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
-      const TargetRegisterClass *Dst1RC = MRI.getRegClass(Dst1.getReg());
-      unsigned NewReg1 = MRI.createVirtualRegister(Dst1RC);
+    MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+    int Op32 = Fold.getShrinkOpcode();
+    MachineOperand &Dst0 = MI->getOperand(0);
+    MachineOperand &Dst1 = MI->getOperand(1);
+    assert(Dst0.isDef() && Dst1.isDef());
 
-      MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
+    bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
 
-      if (HaveNonDbgCarryUse) {
-        BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg())
-          .addReg(AMDGPU::VCC, RegState::Kill);
-      }
+    const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
+    unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
 
-      // Keep the old instruction around to avoid breaking iterators, but
-      // replace the outputs with dummy registers.
-      Dst0.setReg(NewReg0);
-      Dst1.setReg(NewReg1);
+    MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
 
-      if (Fold.isCommuted())
-        TII.commuteInstruction(*Inst32, false);
-      return true;
+    if (HaveNonDbgCarryUse) {
+      BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg())
+        .addReg(AMDGPU::VCC, RegState::Kill);
     }
 
-    Old.ChangeToImmediate(Fold.ImmToFold);
+    // Keep the old instruction around to avoid breaking iterators, but
+    // replace it with a dummy instruction to remove uses.
+    //
+    // FIXME: We should not invert how this pass looks at operands to avoid
+    // this. Should track set of foldable movs instead of looking for uses
+    // when looking at a use.
+    Dst0.setReg(NewReg0);
+    for (unsigned I = MI->getNumOperands() - 1; I > 0; --I)
+      MI->RemoveOperand(I);
+    MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF));
+
+    if (Fold.isCommuted())
+      TII.commuteInstruction(*Inst32, false);
     return true;
   }
 
   assert(!Fold.needsShrink() && "not handled");
 
+  if (Fold.isImm()) {
+    Old.ChangeToImmediate(Fold.ImmToFold);
+    return true;
+  }
+
   if (Fold.isFI()) {
     Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
     return true;
@@ -344,7 +350,7 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
       if ((Opc == AMDGPU::V_ADD_I32_e64 ||
            Opc == AMDGPU::V_SUB_I32_e64 ||
            Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME
-          OpToFold->isImm()) {
+          (OpToFold->isImm() || OpToFold->isFI())) {
         MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
 
         // Verify the other operand is a VGPR, otherwise we would violate the
@@ -357,7 +363,10 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
 
         assert(MI->getOperand(1).isDef());
 
-        int Op32 =  AMDGPU::getVOPe32(Opc);
+        // Make sure to get the 32-bit version of the commuted opcode.
+        unsigned MaybeCommutedOpc = MI->getOpcode();
+        int Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc);
+
         FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true,
                                          Op32));
         return true;
diff --git a/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index e3fd7b5f9fad..8cf524a5128d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -515,18 +515,12 @@ let AddedComplexity = 1 in {
 }
 
 let SubtargetPredicate = HasAddNoCarryInsts in {
-  def : DivergentBinOp<add, V_ADD_U32_e32>;
-  def : DivergentBinOp<sub, V_SUB_U32_e32>;
-  def : DivergentBinOp<sub, V_SUBREV_U32_e32>;
+  def : DivergentBinOp<add, V_ADD_U32_e64>;
+  def : DivergentBinOp<sub, V_SUB_U32_e64>;
 }
 
-
-def : DivergentBinOp<add, V_ADD_I32_e32>;
-
 def : DivergentBinOp<add, V_ADD_I32_e64>;
-def : DivergentBinOp<sub, V_SUB_I32_e32>;
-
-def : DivergentBinOp<sub, V_SUBREV_I32_e32>;
+def : DivergentBinOp<sub, V_SUB_I32_e64>;
 
 def : DivergentBinOp<srl, V_LSHRREV_B32_e32>;
 def : DivergentBinOp<sra, V_ASHRREV_I32_e32>;
diff --git a/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp b/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp
index 57fc978b54bb..5db757782322 100644
--- a/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp
@@ -26,19 +26,21 @@
 
 #include "AVR.h"
 #include "AVRMachineFunctionInfo.h"
+#include "AVRSubtarget.h"
 #include "AVRTargetMachine.h"
 #include "MCTargetDesc/AVRMCTargetDesc.h"
 
 namespace llvm {
 
-AVRTargetLowering::AVRTargetLowering(AVRTargetMachine &tm)
-    : TargetLowering(tm) {
+AVRTargetLowering::AVRTargetLowering(const AVRTargetMachine &TM,
+                                     const AVRSubtarget &STI)
+    : TargetLowering(TM), Subtarget(STI) {
   // Set up the register classes.
   addRegisterClass(MVT::i8, &AVR::GPR8RegClass);
   addRegisterClass(MVT::i16, &AVR::DREGSRegClass);
 
   // Compute derived properties from the register classes.
-  computeRegisterProperties(tm.getSubtargetImpl()->getRegisterInfo());
+  computeRegisterProperties(Subtarget.getRegisterInfo());
 
   setBooleanContents(ZeroOrOneBooleanContent);
   setBooleanVectorContents(ZeroOrOneBooleanContent);
@@ -163,6 +165,13 @@ AVRTargetLowering::AVRTargetLowering(AVRTargetMachine &tm)
   setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
   setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
 
+  // Expand multiplications to libcalls when there is
+  // no hardware MUL.
+  if (!Subtarget.supportsMultiplication()) {
+    setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
+    setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
+  }
+
   for (MVT VT : MVT::integer_valuetypes()) {
     setOperationAction(ISD::MULHS, VT, Expand);
     setOperationAction(ISD::MULHU, VT, Expand);
@@ -1271,7 +1280,7 @@ SDValue AVRTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   // Add a register mask operand representing the call-preserved registers.
   const AVRTargetMachine &TM = (const AVRTargetMachine &)getTargetMachine();
-  const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
+  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
   const uint32_t *Mask =
       TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
   assert(Mask && "Missing call preserved mask for calling convention");
@@ -1434,7 +1443,7 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
   MachineFunction *F = BB->getParent();
   MachineRegisterInfo &RI = F->getRegInfo();
   const AVRTargetMachine &TM = (const AVRTargetMachine &)getTargetMachine();
-  const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
   DebugLoc dl = MI.getDebugLoc();
 
   switch (MI.getOpcode()) {
@@ -1575,7 +1584,7 @@ static bool isCopyMulResult(MachineBasicBlock::iterator const &I) {
 MachineBasicBlock *AVRTargetLowering::insertMul(MachineInstr &MI,
                                                 MachineBasicBlock *BB) const {
   const AVRTargetMachine &TM = (const AVRTargetMachine &)getTargetMachine();
-  const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
   MachineBasicBlock::iterator I(MI);
   ++I; // in any case insert *after* the mul instruction
   if (isCopyMulResult(I))
@@ -1838,9 +1847,6 @@ std::pair<unsigned, const TargetRegisterClass *>
 AVRTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
                                                 StringRef Constraint,
                                                 MVT VT) const {
-  auto STI = static_cast<const AVRTargetMachine &>(this->getTargetMachine())
-                 .getSubtargetImpl();
-
   // We only support i8 and i16.
   //
   //:FIXME: remove this assert for now since it gets sometimes executed
@@ -1884,8 +1890,8 @@ AVRTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
     }
   }
 
-  return TargetLowering::getRegForInlineAsmConstraint(STI->getRegisterInfo(),
-                                                      Constraint, VT);
+  return TargetLowering::getRegForInlineAsmConstraint(
+      Subtarget.getRegisterInfo(), Constraint, VT);
 }
 
 void AVRTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
diff --git a/contrib/llvm/lib/Target/AVR/AVRISelLowering.h b/contrib/llvm/lib/Target/AVR/AVRISelLowering.h
index c90c65c81f70..7d77dd8fb018 100644
--- a/contrib/llvm/lib/Target/AVR/AVRISelLowering.h
+++ b/contrib/llvm/lib/Target/AVR/AVRISelLowering.h
@@ -64,12 +64,14 @@ enum NodeType {
 
 } // end of namespace AVRISD
 
+class AVRSubtarget;
 class AVRTargetMachine;
 
 /// Performs target lowering for the AVR.
 class AVRTargetLowering : public TargetLowering {
 public:
-  explicit AVRTargetLowering(AVRTargetMachine &TM);
+  explicit AVRTargetLowering(const AVRTargetMachine &TM,
+                             const AVRSubtarget &STI);
 
 public:
   MVT getScalarShiftAmountTy(const DataLayout &, EVT LHSTy) const override {
@@ -164,6 +166,10 @@ private:
                           const SDLoc &dl, SelectionDAG &DAG,
                           SmallVectorImpl<SDValue> &InVals) const;
 
+protected:
+
+  const AVRSubtarget &Subtarget;
+
 private:
   MachineBasicBlock *insertShift(MachineInstr &MI, MachineBasicBlock *BB) const;
   MachineBasicBlock *insertMul(MachineInstr &MI, MachineBasicBlock *BB) const;
diff --git a/contrib/llvm/lib/Target/AVR/AVRSubtarget.cpp b/contrib/llvm/lib/Target/AVR/AVRSubtarget.cpp
index 556d69ec5234..c7c566270f43 100644
--- a/contrib/llvm/lib/Target/AVR/AVRSubtarget.cpp
+++ b/contrib/llvm/lib/Target/AVR/AVRSubtarget.cpp
@@ -29,9 +29,9 @@
 namespace llvm {
 
 AVRSubtarget::AVRSubtarget(const Triple &TT, const std::string &CPU,
-                           const std::string &FS, AVRTargetMachine &TM)
+                           const std::string &FS, const AVRTargetMachine &TM)
     : AVRGenSubtargetInfo(TT, CPU, FS), InstrInfo(), FrameLowering(),
-      TLInfo(TM), TSInfo(),
+      TLInfo(TM, initializeSubtargetDependencies(CPU, FS, TM)), TSInfo(),
 
       // Subtarget features
       m_hasSRAM(false), m_hasJMPCALL(false), m_hasIJMPCALL(false),
@@ -44,4 +44,12 @@ AVRSubtarget::AVRSubtarget(const Triple &TT, const std::string &CPU,
   ParseSubtargetFeatures(CPU, FS);
 }
 
+AVRSubtarget &
+AVRSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS,
+                                              const TargetMachine &TM) {
+  // Parse features string.
+  ParseSubtargetFeatures(CPU, FS);
+  return *this;
+}
+
 } // end of namespace llvm
diff --git a/contrib/llvm/lib/Target/AVR/AVRSubtarget.h b/contrib/llvm/lib/Target/AVR/AVRSubtarget.h
index fa26738da190..ba036d5e4061 100644
--- a/contrib/llvm/lib/Target/AVR/AVRSubtarget.h
+++ b/contrib/llvm/lib/Target/AVR/AVRSubtarget.h
@@ -37,7 +37,7 @@ public:
   //! \param FS  The feature string.
   //! \param TM  The target machine.
   AVRSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
-               AVRTargetMachine &TM);
+               const AVRTargetMachine &TM);
 
   const AVRInstrInfo *getInstrInfo() const override { return &InstrInfo; }
   const TargetFrameLowering *getFrameLowering() const override { return &FrameLowering; }
@@ -49,6 +49,9 @@ public:
   /// \note Definition of function is auto generated by `tblgen`.
   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
+  AVRSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS,
+                                                const TargetMachine &TM);
+
   // Subtarget feature getters.
   // See AVR.td for details.
   bool hasSRAM() const { return m_hasSRAM; }
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index 58f9717e1cc6..a46f84bd1c9c 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -700,8 +700,11 @@ void MipsTargetAsmStreamer::emitDirectiveCpreturn(unsigned SaveLocation,
 }
 
 void MipsTargetAsmStreamer::emitDirectiveModuleFP() {
-  OS << "\t.module\tfp=";
-  OS << ABIFlagsSection.getFpABIString(ABIFlagsSection.getFpABI()) << "\n";
+  MipsABIFlagsSection::FpABIKind FpABI = ABIFlagsSection.getFpABI();
+  if (FpABI == MipsABIFlagsSection::FpABIKind::SOFT)
+    OS << "\t.module\tsoftfloat\n";
+  else
+    OS << "\t.module\tfp=" << ABIFlagsSection.getFpABIString(FpABI) << "\n";
 }
 
 void MipsTargetAsmStreamer::emitDirectiveSetFp(
diff --git a/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td b/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
index c441aa76ad40..994a8882f942 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
@@ -1040,7 +1040,7 @@ class TRUNC_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.l.d", FGR64Opnd,
 class TRUNC_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.w.s", FGR32Opnd,
                                                     FGR32Opnd, II_TRUNC>;
 class TRUNC_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.w.d", FGR32Opnd,
-                                                    AFGR64Opnd, II_TRUNC>;
+                                                    FGR64Opnd, II_TRUNC>;
 class SQRT_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"sqrt.s", FGR32Opnd, FGR32Opnd,
                                                  II_SQRT_S, fsqrt>;
 class SQRT_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"sqrt.d", AFGR64Opnd, AFGR64Opnd,
@@ -1750,6 +1750,8 @@ def : MipsPat<(f32 fpimm0), (MTC1_MMR6 ZERO)>, ISA_MICROMIPS32R6;
 def : MipsPat<(f32 fpimm0neg), (FNEG_S_MMR6 (MTC1_MMR6 ZERO))>, ISA_MICROMIPS32R6;
 def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src),
               (TRUNC_W_D_MMR6 FGR64Opnd:$src)>, ISA_MICROMIPS32R6;
+def : MipsPat<(MipsTruncIntFP FGR32Opnd:$src),
+              (TRUNC_W_S_MMR6 FGR32Opnd:$src)>, ISA_MICROMIPS32R6;
 
 def : MipsPat<(and GPRMM16:$src, immZExtAndi16:$imm),
               (ANDI16_MMR6 GPRMM16:$src, immZExtAndi16:$imm)>,
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
index 1731afc1961f..9e76165e7ad7 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
@@ -425,6 +425,11 @@ def : MipsPat<(f64 (fpextend FGR32Opnd:$src)),
 def : MipsPat<(MipsTruncIntFP AFGR64Opnd:$src),
               (TRUNC_W_MM AFGR64Opnd:$src)>, ISA_MICROMIPS32_NOT_MIPS32R6,
               FGR_32;
+def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src),
+              (CVT_W_D64_MM FGR64Opnd:$src)>, ISA_MICROMIPS32_NOT_MIPS32R6,
+              FGR_64;
+def : MipsPat<(MipsTruncIntFP FGR32Opnd:$src),
+              (TRUNC_W_S_MM FGR32Opnd:$src)>, ISA_MICROMIPS32_NOT_MIPS32R6;
 
 // Selects
 defm : MovzPats0<GPR32, FGR32, MOVZ_I_S_MM, SLT_MM, SLTu_MM, SLTi_MM, SLTiu_MM>,
diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index a7a748b0840e..c35f5beb6880 100644
--- a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -813,7 +813,8 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
   // We should always emit a '.module fp=...' but binutils 2.24 does not accept
   // it. We therefore emit it when it contradicts the ABI defaults (-mfpxx or
   // -mfp64) and omit it otherwise.
-  if (ABI.IsO32() && (STI.isABI_FPXX() || STI.isFP64bit()))
+  if ((ABI.IsO32() && (STI.isABI_FPXX() || STI.isFP64bit())) ||
+      STI.useSoftFloat())
     TS.emitDirectiveModuleFP();
 
   // We should always emit a '.module [no]oddspreg' but binutils 2.24 does not
diff --git a/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
index b9824220b558..a4078026e4f9 100644
--- a/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
@@ -1314,7 +1314,9 @@ def PseudoCMPU_LE_QB : PseudoCMP<CMPU_LE_QB>;
 def PseudoPICK_PH : PseudoPICK<PICK_PH>;
 def PseudoPICK_QB : PseudoPICK<PICK_QB>;
 
-def PseudoMTLOHI_DSP : PseudoMTLOHI<ACC64DSP, GPR32>;
+let AdditionalPredicates = [HasDSP] in {
+  def PseudoMTLOHI_DSP : PseudoMTLOHI<ACC64DSP, GPR32>;
+}
 
 // Patterns.
 class DSPPat<dag pattern, dag result, Predicate pred = HasDSP> :
diff --git a/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
index e3823e0dfdb8..61e77fbeea6d 100644
--- a/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -726,6 +726,7 @@ bool MipsDelaySlotFiller::searchRange(MachineBasicBlock &MBB, IterTy Begin,
     // but we don't have enough information to make that decision.
      if (InMicroMipsMode && TII->getInstSizeInBytes(*CurrI) == 2 &&
         (Opcode == Mips::JR || Opcode == Mips::PseudoIndirectBranch ||
+         Opcode == Mips::PseudoIndirectBranch_MM ||
          Opcode == Mips::PseudoReturn || Opcode == Mips::TAILCALL))
       continue;
      // Instructions LWP/SWP and MOVEP should not be in a delay slot as that
diff --git a/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp b/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp
index a18416b9e861..168750b2cba9 100644
--- a/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp
@@ -954,21 +954,34 @@ bool MipsFastISel::selectBranch(const Instruction *I) {
   //
   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
-  // For now, just try the simplest case where it's fed by a compare.
+
+  // Fold the common case of a conditional branch with a comparison
+  // in the same block.
+  unsigned ZExtCondReg = 0;
   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
-    MVT CIMVT =
-        TLI.getValueType(DL, CI->getOperand(0)->getType(), true).getSimpleVT();
-    if (CIMVT == MVT::i1)
+    if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
+      ZExtCondReg = createResultReg(&Mips::GPR32RegClass);
+      if (!emitCmp(ZExtCondReg, CI))
+        return false;
+    }
+  }
+
+  // For the general case, we need to mask with 1.
+  if (ZExtCondReg == 0) {
+    unsigned CondReg = getRegForValue(BI->getCondition());
+    if (CondReg == 0)
       return false;
 
-    unsigned CondReg = getRegForValue(CI);
-    BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::BGTZ))
-        .addReg(CondReg)
-        .addMBB(TBB);
-    finishCondBranch(BI->getParent(), TBB, FBB);
-    return true;
+    ZExtCondReg = emitIntExt(MVT::i1, CondReg, MVT::i32, true);
+    if (ZExtCondReg == 0)
+      return false;
   }
-  return false;
+
+  BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::BGTZ))
+      .addReg(ZExtCondReg)
+      .addMBB(TBB);
+  finishCondBranch(BI->getParent(), TBB, FBB);
+  return true;
 }
 
 bool MipsFastISel::selectCmp(const Instruction *I) {
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
index c7ab90ed2a3b..2b26caaa9f49 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -447,6 +447,9 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
   case Mips::PseudoMTLOHI_DSP:
     expandPseudoMTLoHi(MBB, MI, Mips::MTLO_DSP, Mips::MTHI_DSP, true);
     break;
+  case Mips::PseudoMTLOHI_MM:
+    expandPseudoMTLoHi(MBB, MI, Mips::MTLO_MM, Mips::MTHI_MM, false);
+    break;
   case Mips::PseudoCVT_S_W:
     expandCvtFPInt(MBB, MI, Mips::CVT_S_W, Mips::MTC1, false);
     break;
diff --git a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 26869f250823..cce239cac970 100644
--- a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -61,6 +61,14 @@ extern "C" void LLVMInitializePowerPCDisassembler() {
                                          createPPCLEDisassembler);
 }
 
+static DecodeStatus DecodePCRel24BranchTarget(MCInst &Inst, unsigned Imm,
+                                              uint64_t Addr,
+                                              const void *Decoder) {
+  int32_t Offset = SignExtend32<24>(Imm);
+  Inst.addOperand(MCOperand::createImm(Offset));
+  return MCDisassembler::Success;
+}
+
 // FIXME: These can be generated by TableGen from the existing register
 // encoding values!
 
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index fc29e4effbb1..6824168b890d 100644
--- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -382,8 +382,11 @@ void PPCInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo,
 
   // Branches can take an immediate operand.  This is used by the branch
   // selection pass to print .+8, an eight byte displacement from the PC.
-  O << ".+";
-  printAbsBranchOperand(MI, OpNo, O);
+  O << ".";
+  int32_t Imm = SignExtend32<32>((unsigned)MI->getOperand(OpNo).getImm() << 2);
+  if (Imm >= 0)
+    O << "+";
+  O << Imm;
 }
 
 void PPCInstPrinter::printAbsBranchOperand(const MCInst *MI, unsigned OpNo,
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index a1e4e07b25af..78609ef3d4e0 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -15,6 +15,7 @@
 #include "InstPrinter/PPCInstPrinter.h"
 #include "MCTargetDesc/PPCMCAsmInfo.h"
 #include "PPCTargetStreamer.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/BinaryFormat/ELF.h"
@@ -182,16 +183,33 @@ public:
 
   void emitAssignment(MCSymbol *S, const MCExpr *Value) override {
     auto *Symbol = cast<MCSymbolELF>(S);
+
     // When encoding an assignment to set symbol A to symbol B, also copy
     // the st_other bits encoding the local entry point offset.
-    if (Value->getKind() != MCExpr::SymbolRef)
-      return;
-    const auto &RhsSym = cast<MCSymbolELF>(
-        static_cast<const MCSymbolRefExpr *>(Value)->getSymbol());
-    unsigned Other = Symbol->getOther();
+    if (copyLocalEntry(Symbol, Value))
+      UpdateOther.insert(Symbol);
+    else
+      UpdateOther.erase(Symbol);
+  }
+
+  void finish() override {
+    for (auto *Sym : UpdateOther)
+      copyLocalEntry(Sym, Sym->getVariableValue());
+  }
+
+private:
+  SmallPtrSet<MCSymbolELF *, 32> UpdateOther;
+
+  bool copyLocalEntry(MCSymbolELF *D, const MCExpr *S) {
+    auto *Ref = dyn_cast<const MCSymbolRefExpr>(S);
+    if (!Ref)
+      return false;
+    const auto &RhsSym = cast<MCSymbolELF>(Ref->getSymbol());
+    unsigned Other = D->getOther();
     Other &= ~ELF::STO_PPC64_LOCAL_MASK;
     Other |= RhsSym.getOther() & ELF::STO_PPC64_LOCAL_MASK;
-    Symbol->setOther(Other);
+    D->setOther(Other);
+    return true;
   }
 };
 
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 31acd0ff870f..70e9049a2ab3 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -4359,8 +4359,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
     const Module *M = MF->getFunction().getParent();
 
     if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
-        !PPCSubTarget->isSecurePlt() || !PPCSubTarget->isTargetELF() ||
-        M->getPICLevel() == PICLevel::SmallPIC)
+        (!TM.isPositionIndependent() || !PPCSubTarget->isSecurePlt()) ||
+        !PPCSubTarget->isTargetELF() || M->getPICLevel() == PICLevel::SmallPIC)
       break;
 
     SDValue Op = N->getOperand(1);
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index dd3f1ac79089..77aa4fe3d415 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -737,7 +737,9 @@ def abscondbrtarget : Operand<OtherVT> {
 def calltarget : Operand<iPTR> {
   let PrintMethod = "printBranchOperand";
   let EncoderMethod = "getDirectBrEncoding";
+  let DecoderMethod = "DecodePCRel24BranchTarget";
   let ParserMatchClass = PPCDirectBrAsmOperand;
+  let OperandType = "OPERAND_PCREL";
 }
 def abscalltarget : Operand<iPTR> {
   let PrintMethod = "printAbsBranchOperand";
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index c0cbfd779cb9..1fdf74549dec 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -138,6 +138,9 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
   if (isDarwin())
     HasLazyResolverStubs = true;
 
+  if (TargetTriple.isOSNetBSD() || TargetTriple.isOSOpenBSD())
+    SecurePlt = true;
+
   if (HasSPE && IsPPC64)
     report_fatal_error( "SPE is only supported for 32-bit targets.\n", false);
   if (HasSPE && (HasAltivec || HasQPX || HasVSX || HasFPU))
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
index 33caa66154ff..ad6ea3760fee 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -189,7 +189,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       MachineInstr *StMI =
         BuildMI(*MI.getParent(), II, dl, TII.get(SP::STDFri))
         .addReg(FrameReg).addImm(0).addReg(SrcEvenReg);
-      replaceFI(MF, II, *StMI, dl, 0, Offset, FrameReg);
+      replaceFI(MF, *StMI, *StMI, dl, 0, Offset, FrameReg);
       MI.setDesc(TII.get(SP::STDFri));
       MI.getOperand(2).setReg(SrcOddReg);
       Offset += 8;
@@ -201,7 +201,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       MachineInstr *StMI =
         BuildMI(*MI.getParent(), II, dl, TII.get(SP::LDDFri), DestEvenReg)
         .addReg(FrameReg).addImm(0);
-      replaceFI(MF, II, *StMI, dl, 1, Offset, FrameReg);
+      replaceFI(MF, *StMI, *StMI, dl, 1, Offset, FrameReg);
 
       MI.setDesc(TII.get(SP::LDDFri));
       MI.getOperand(0).setReg(DestOddReg);
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 003848e34227..f7f29d85cbb2 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -669,13 +669,16 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
   if (IsVarArg) {
     // Outgoing non-fixed arguments are placed in a buffer. First
     // compute their offsets and the total amount of buffer space needed.
-    for (SDValue Arg :
-         make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) {
+    for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
+      const ISD::OutputArg &Out = Outs[I];
+      SDValue &Arg = OutVals[I];
       EVT VT = Arg.getValueType();
       assert(VT != MVT::iPTR && "Legalized args should be concrete");
       Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+      unsigned Align = std::max(Out.Flags.getOrigAlign(),
+                                Layout.getABITypeAlignment(Ty));
       unsigned Offset = CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty),
-                                             Layout.getABITypeAlignment(Ty));
+                                             Align);
       CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
                                         Offset, VT.getSimpleVT(),
                                         CCValAssign::Full));
diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
index 9dd3f2652543..12cd613c34cb 100644
--- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
@@ -399,7 +399,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
   case MVT::v2i64:
   case MVT::v8i16:
   case MVT::v16i8:
-    if (IsNonTemporal && Alignment >= 16)
+    if (IsNonTemporal && Alignment >= 16 && HasSSE41)
       Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
             HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
     else if (Alignment >= 16)
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
index afcb49dc2263..217a12ddf896 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -38,6 +38,7 @@
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/CommandLine.h"
@@ -512,6 +513,9 @@ void X86PassConfig::addPreEmitPass2() {
   // correct CFA calculation rule where needed by inserting appropriate CFI
   // instructions.
   const Triple &TT = TM->getTargetTriple();
-  if (!TT.isOSDarwin() && !TT.isOSWindows())
+  const MCAsmInfo *MAI = TM->getMCAsmInfo();
+  if (!TT.isOSDarwin() &&
+      (!TT.isOSWindows() ||
+       MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI))
     addPass(createCFIInstrInserter());
 }
diff --git a/contrib/llvm/tools/clang/lib/Basic/Version.cpp b/contrib/llvm/tools/clang/lib/Basic/Version.cpp
index a15c60e0f55c..1d594b974189 100644
--- a/contrib/llvm/tools/clang/lib/Basic/Version.cpp
+++ b/contrib/llvm/tools/clang/lib/Basic/Version.cpp
@@ -36,7 +36,7 @@ std::string getClangRepositoryPath() {
 
   // If the SVN_REPOSITORY is empty, try to use the SVN keyword. This helps us
   // pick up a tag in an SVN export, for example.
-  StringRef SVNRepository("$URL: https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_800/final/lib/Basic/Version.cpp $");
+  StringRef SVNRepository("$URL: https://llvm.org/svn/llvm-project/cfe/branches/release_80/lib/Basic/Version.cpp $");
   if (URL.empty()) {
     URL = SVNRepository.slice(SVNRepository.find(':'),
                               SVNRepository.find("/lib/Basic"));
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp
index eb1304d89345..44dc1cdee0b5 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1518,8 +1518,9 @@ void CodeGenFunction::EmitOMPPrivateLoopCounters(
          I < E; ++I) {
       const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
       const auto *VD = cast<VarDecl>(DRE->getDecl());
-      // Override only those variables that are really emitted already.
-      if (LocalDeclMap.count(VD)) {
+      // Override only those variables that can be captured to avoid re-emission
+      // of the variables declared within the loops.
+      if (DRE->refersToEnclosingVariableOrCapture()) {
         (void)LoopScope.addPrivate(VD, [this, DRE, VD]() {
           return CreateMemTemp(DRE->getType(), VD->getName());
         });
diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Clang.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Clang.cpp
index 589f53b11921..78ee7a78176f 100644
--- a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Clang.cpp
@@ -718,8 +718,9 @@ static void appendUserToPath(SmallVectorImpl<char> &Result) {
   Result.append(UID.begin(), UID.end());
 }
 
-static void addPGOAndCoverageFlags(Compilation &C, const Driver &D,
-                                   const InputInfo &Output, const ArgList &Args,
+static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C,
+                                   const Driver &D, const InputInfo &Output,
+                                   const ArgList &Args,
                                    ArgStringList &CmdArgs) {
 
   auto *PGOGenerateArg = Args.getLastArg(options::OPT_fprofile_generate,
@@ -759,6 +760,11 @@ static void addPGOAndCoverageFlags(Compilation &C, const Driver &D,
                                            ProfileGenerateArg->getValue()));
     // The default is to use Clang Instrumentation.
     CmdArgs.push_back("-fprofile-instrument=clang");
+    if (TC.getTriple().isWindowsMSVCEnvironment()) {
+      // Add dependent lib for clang_rt.profile
+      CmdArgs.push_back(Args.MakeArgString("--dependent-lib=" +
+                                           TC.getCompilerRT(Args, "profile")));
+    }
   }
 
   if (PGOGenerateArg) {
@@ -4118,7 +4124,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   // sampling, overhead of call arc collection is way too high and there's no
   // way to collect the output.
   if (!Triple.isNVPTX())
-    addPGOAndCoverageFlags(C, D, Output, Args, CmdArgs);
+    addPGOAndCoverageFlags(TC, C, D, Output, Args, CmdArgs);
 
   if (auto *ABICompatArg = Args.getLastArg(options::OPT_fclang_abi_compat_EQ))
     ABICompatArg->render(Args, CmdArgs);
diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Linux.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Linux.cpp
index 65ab9b2daf54..dfdfb18319ab 100644
--- a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Linux.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Linux.cpp
@@ -45,6 +45,7 @@ static std::string getMultiarchTriple(const Driver &D,
       TargetTriple.getEnvironment();
   bool IsAndroid = TargetTriple.isAndroid();
   bool IsMipsR6 = TargetTriple.getSubArch() == llvm::Triple::MipsSubArch_r6;
+  bool IsMipsN32Abi = TargetTriple.getEnvironment() == llvm::Triple::GNUABIN32;
 
   // For most architectures, just use whatever we have rather than trying to be
   // clever.
@@ -103,33 +104,37 @@ static std::string getMultiarchTriple(const Driver &D,
       return "aarch64_be-linux-gnu";
     break;
   case llvm::Triple::mips: {
-    std::string Arch = IsMipsR6 ? "mipsisa32r6" : "mips";
-    if (D.getVFS().exists(SysRoot + "/lib/" + Arch + "-linux-gnu"))
-      return Arch + "-linux-gnu";
+    std::string MT = IsMipsR6 ? "mipsisa32r6-linux-gnu" : "mips-linux-gnu";
+    if (D.getVFS().exists(SysRoot + "/lib/" + MT))
+      return MT;
     break;
   }
   case llvm::Triple::mipsel: {
     if (IsAndroid)
       return "mipsel-linux-android";
-    std::string Arch = IsMipsR6 ? "mipsisa32r6el" : "mipsel";
-    if (D.getVFS().exists(SysRoot + "/lib/" + Arch + "-linux-gnu"))
-      return Arch + "-linux-gnu";
+    std::string MT = IsMipsR6 ? "mipsisa32r6el-linux-gnu" : "mipsel-linux-gnu";
+    if (D.getVFS().exists(SysRoot + "/lib/" + MT))
+      return MT;
     break;
   }
   case llvm::Triple::mips64: {
-    std::string Arch = IsMipsR6 ? "mipsisa64r6" : "mips64";
-    std::string ABI = llvm::Triple::getEnvironmentTypeName(TargetEnvironment);
-    if (D.getVFS().exists(SysRoot + "/lib/" + Arch + "-linux-" + ABI))
-      return Arch + "-linux-" + ABI;
+    std::string MT = std::string(IsMipsR6 ? "mipsisa64r6" : "mips64") +
+                     "-linux-" + (IsMipsN32Abi ? "gnuabin32" : "gnuabi64");
+    if (D.getVFS().exists(SysRoot + "/lib/" + MT))
+      return MT;
+    if (D.getVFS().exists(SysRoot + "/lib/mips64-linux-gnu"))
+      return "mips64-linux-gnu";
     break;
   }
   case llvm::Triple::mips64el: {
     if (IsAndroid)
       return "mips64el-linux-android";
-    std::string Arch = IsMipsR6 ? "mipsisa64r6el" : "mips64el";
-    std::string ABI = llvm::Triple::getEnvironmentTypeName(TargetEnvironment);
-    if (D.getVFS().exists(SysRoot + "/lib/" + Arch + "-linux-" + ABI))
-      return Arch + "-linux-" + ABI;
+    std::string MT = std::string(IsMipsR6 ? "mipsisa64r6el" : "mips64el") +
+                     "-linux-" + (IsMipsN32Abi ? "gnuabin32" : "gnuabi64");
+    if (D.getVFS().exists(SysRoot + "/lib/" + MT))
+      return MT;
+    if (D.getVFS().exists(SysRoot + "/lib/mips64el-linux-gnu"))
+      return "mips64el-linux-gnu";
     break;
   }
   case llvm::Triple::ppc:
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp
index aedec746af9e..8a0be0c472de 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp
@@ -4602,8 +4602,7 @@ DeclRefExpr *OpenMPIterationSpaceChecker::buildCounterVar(
       Captures.insert(std::make_pair(LCRef, Ref));
     return Ref;
   }
-  return buildDeclRefExpr(SemaRef, VD, VD->getType().getNonReferenceType(),
-                          DefaultLoc);
+  return cast<DeclRefExpr>(LCRef);
 }
 
 Expr *OpenMPIterationSpaceChecker::buildPrivateCounterVar() const {
diff --git a/contrib/llvm/tools/lld/COFF/Writer.cpp b/contrib/llvm/tools/lld/COFF/Writer.cpp
index 6acfaf9a4454..56b797451cfc 100644
--- a/contrib/llvm/tools/lld/COFF/Writer.cpp
+++ b/contrib/llvm/tools/lld/COFF/Writer.cpp
@@ -1351,19 +1351,47 @@ static void addSymbolToRVASet(SymbolRVASet &RVASet, Defined *S) {
 // symbol in an executable section.
 static void maybeAddAddressTakenFunction(SymbolRVASet &AddressTakenSyms,
                                          Symbol *S) {
-  auto *D = dyn_cast_or_null<DefinedCOFF>(S);
-
-  // Ignore undefined symbols and references to non-functions (e.g. globals and
-  // labels).
-  if (!D ||
-      D->getCOFFSymbol().getComplexType() != COFF::IMAGE_SYM_DTYPE_FUNCTION)
+  if (!S)
     return;
 
-  // Mark the symbol as address taken if it's in an executable section.
-  Chunk *RefChunk = D->getChunk();
-  OutputSection *OS = RefChunk ? RefChunk->getOutputSection() : nullptr;
-  if (OS && OS->Header.Characteristics & IMAGE_SCN_MEM_EXECUTE)
-    addSymbolToRVASet(AddressTakenSyms, D);
+  switch (S->kind()) {
+  case Symbol::DefinedLocalImportKind:
+  case Symbol::DefinedImportDataKind:
+    // Defines an __imp_ pointer, so it is data, so it is ignored.
+    break;
+  case Symbol::DefinedCommonKind:
+    // Common is always data, so it is ignored.
+    break;
+  case Symbol::DefinedAbsoluteKind:
+  case Symbol::DefinedSyntheticKind:
+    // Absolute is never code, synthetic generally isn't and usually isn't
+    // determinable.
+    break;
+  case Symbol::LazyKind:
+  case Symbol::UndefinedKind:
+    // Undefined symbols resolve to zero, so they don't have an RVA. Lazy
+    // symbols shouldn't have relocations.
+    break;
+
+  case Symbol::DefinedImportThunkKind:
+    // Thunks are always code, include them.
+    addSymbolToRVASet(AddressTakenSyms, cast<Defined>(S));
+    break;
+
+  case Symbol::DefinedRegularKind: {
+    // This is a regular, defined, symbol from a COFF file. Mark the symbol as
+    // address taken if the symbol type is function and it's in an executable
+    // section.
+    auto *D = cast<DefinedRegular>(S);
+    if (D->getCOFFSymbol().getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION) {
+      Chunk *RefChunk = D->getChunk();
+      OutputSection *OS = RefChunk ? RefChunk->getOutputSection() : nullptr;
+      if (OS && OS->Header.Characteristics & IMAGE_SCN_MEM_EXECUTE)
+        addSymbolToRVASet(AddressTakenSyms, D);
+    }
+    break;
+  }
+  }
 }
 
 // Visit all relocations from all section contributions of this object file and
diff --git a/contrib/llvm/tools/lld/ELF/Arch/PPC64.cpp b/contrib/llvm/tools/lld/ELF/Arch/PPC64.cpp
index 8a320c9a4e9e..cbfa8073d33f 100644
--- a/contrib/llvm/tools/lld/ELF/Arch/PPC64.cpp
+++ b/contrib/llvm/tools/lld/ELF/Arch/PPC64.cpp
@@ -113,6 +113,7 @@ public:
   void writeGotHeader(uint8_t *Buf) const override;
   bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
                   uint64_t BranchAddr, const Symbol &S) const override;
+  uint32_t getThunkSectionSpacing() const override;
   bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override;
   RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data,
                           RelExpr Expr) const override;
@@ -759,6 +760,14 @@ bool PPC64::needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
   return !inBranchRange(Type, BranchAddr, S.getVA());
 }
 
+uint32_t PPC64::getThunkSectionSpacing() const {
+  // See comment in Arch/ARM.cpp for a more detailed explanation of
+  // getThunkSectionSpacing(). For PPC64 we pick the constant here based on
+  // R_PPC64_REL24, which is used by unconditional branch instructions.
+  // 0x2000000 = (1 << 24-1) * 4
+  return 0x2000000;
+}
+
 bool PPC64::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const {
   int64_t Offset = Dst - Src;
   if (Type == R_PPC64_REL14)
diff --git a/contrib/llvm/tools/lld/ELF/InputSection.cpp b/contrib/llvm/tools/lld/ELF/InputSection.cpp
index 839bff7011eb..ca2f49c07bb7 100644
--- a/contrib/llvm/tools/lld/ELF/InputSection.cpp
+++ b/contrib/llvm/tools/lld/ELF/InputSection.cpp
@@ -248,6 +248,7 @@ void InputSectionBase::parseCompressedHeader() {
     }
 
     UncompressedSize = Hdr->ch_size;
+    Alignment = std::max<uint64_t>(Hdr->ch_addralign, 1);
     RawData = RawData.slice(sizeof(*Hdr));
     return;
   }
@@ -265,6 +266,7 @@ void InputSectionBase::parseCompressedHeader() {
   }
 
   UncompressedSize = Hdr->ch_size;
+  Alignment = std::max<uint64_t>(Hdr->ch_addralign, 1);
   RawData = RawData.slice(sizeof(*Hdr));
 }
 
@@ -578,10 +580,6 @@ static int64_t getTlsTpOffset() {
     // Variant 1. The thread pointer points to a TCB with a fixed 2-word size,
     // followed by a variable amount of alignment padding, followed by the TLS
     // segment.
-    //
-    // NB: While the ARM/AArch64 ABI formally has a 2-word TCB size, lld
-    // effectively increases the TCB size to 8 words for Android compatibility.
-    // It accomplishes this by increasing the segment's alignment.
     return alignTo(Config->Wordsize * 2, Out::TlsPhdr->p_align);
   case EM_386:
   case EM_X86_64:
diff --git a/contrib/llvm/tools/lld/ELF/SyntheticSections.cpp b/contrib/llvm/tools/lld/ELF/SyntheticSections.cpp
index 5eec83b0391b..13344949b2a0 100644
--- a/contrib/llvm/tools/lld/ELF/SyntheticSections.cpp
+++ b/contrib/llvm/tools/lld/ELF/SyntheticSections.cpp
@@ -2003,6 +2003,11 @@ template <class ELFT> void SymbolTableSection<ELFT>::writeTo(uint8_t *Buf) {
       ESym->setVisibility(Sym->Visibility);
     }
 
+    // The 3 most significant bits of st_other are used by OpenPOWER ABI.
+    // See getPPC64GlobalEntryToLocalEntryOffset() for more details.
+    if (Config->EMachine == EM_PPC64)
+      ESym->st_other |= Sym->StOther & 0xe0;
+
     ESym->st_name = Ent.StrTabOffset;
     ESym->st_shndx = getSymSectionIndex(Ent.Sym);
 
diff --git a/contrib/llvm/tools/lld/ELF/Writer.cpp b/contrib/llvm/tools/lld/ELF/Writer.cpp
index dcabf52e64f3..75d9dcabb407 100644
--- a/contrib/llvm/tools/lld/ELF/Writer.cpp
+++ b/contrib/llvm/tools/lld/ELF/Writer.cpp
@@ -547,6 +547,11 @@ static bool shouldKeepInSymtab(SectionBase *Sec, StringRef SymName,
   if (Config->Discard == DiscardPolicy::None)
     return true;
 
+  // If -emit-reloc is given, all symbols including local ones need to be
+  // copied because they may be referenced by relocations.
+  if (Config->EmitRelocs)
+    return true;
+
   // In ELF assembly .L symbols are normally discarded by the assembler.
   // If the assembler fails to do so, the linker discards them if
   // * --discard-locals is used.
@@ -2216,17 +2221,6 @@ template <class ELFT> void Writer<ELFT>::setPhdrs() {
     }
 
     if (P->p_type == PT_TLS && P->p_memsz) {
-      if (!Config->Shared &&
-          (Config->EMachine == EM_ARM || Config->EMachine == EM_AARCH64)) {
-        // On ARM/AArch64, reserve extra space (8 words) between the thread
-        // pointer and an executable's TLS segment by overaligning the segment.
-        // This reservation is needed for backwards compatibility with Android's
-        // TCB, which allocates several slots after the thread pointer (e.g.
-        // TLS_SLOT_STACK_GUARD==5). For simplicity, this overalignment is also
-        // done on other operating systems.
-        P->p_align = std::max<uint64_t>(P->p_align, Config->Wordsize * 8);
-      }
-
       // The TLS pointer goes after PT_TLS for variant 2 targets. At least glibc
       // will align it, so round up the size to make sure the offsets are
       // correct.
diff --git a/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp b/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp
index ba8d3c5b8d5c..9bd4528ef7f7 100644
--- a/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -2087,20 +2087,38 @@ void llvm::printSymbolTable(const ObjectFile *O, StringRef ArchiveName,
       outs() << SectionName;
     }
 
-    outs() << '\t';
     if (Common || isa<ELFObjectFileBase>(O)) {
       uint64_t Val =
           Common ? Symbol.getAlignment() : ELFSymbolRef(Symbol).getSize();
-      outs() << format("\t %08" PRIx64 " ", Val);
+      outs() << format("\t%08" PRIx64, Val);
     }
 
-    if (Hidden)
-      outs() << ".hidden ";
+    if (isa<ELFObjectFileBase>(O)) {
+      uint8_t Other = ELFSymbolRef(Symbol).getOther();
+      switch (Other) {
+      case ELF::STV_DEFAULT:
+        break;
+      case ELF::STV_INTERNAL:
+        outs() << " .internal";
+        break;
+      case ELF::STV_HIDDEN:
+        outs() << " .hidden";
+        break;
+      case ELF::STV_PROTECTED:
+        outs() << " .protected";
+        break;
+      default:
+        outs() << format(" 0x%02x", Other);
+        break;
+      }
+    } else if (Hidden) {
+      outs() << " .hidden";
+    }
 
     if (Demangle)
-      outs() << demangle(Name) << '\n';
+      outs() << ' ' << demangle(Name) << '\n';
     else
-      outs() << Name << '\n';
+      outs() << ' ' << Name << '\n';
   }
 }